# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 08_descriptives.R
### This script creates descriptive statistics and plots for endline and follow-up outcomes.
# ----
# Endline outcomes ----
## Correlation matrix ----
# Correlation matrix of main outcomes
main_outcomes <- dv_labs %>%
  filter(type == "idx" & secondary_outcome == FALSE) %>%
  mutate(
    dv = factor(dv, levels = c("accuracy_discernment",
                               "sharing_discernment",
                               "source_discernment",
                               "health_preferences",
                               "engagement_attitude",
                               "engagement_behavior",
                               "awareness"))) %>%
  pull(dv)

cor_mat <- bimli_svy_dkr.rm$variables %>%
  select(all_of(main_outcomes)) %>%
  cor(use = "pairwise.complete.obs")

name_map <- c(
  engagement_behavior  = "Engagement\nBehavior",
  engagement_attitude  = "Engagement\nAttitude",
  source_discernment   = "Source\nDiscernment",
  health_preferences   = "Health\nPreferences",
  sharing_discernment  = "Sharing\nDiscernment",
  accuracy_discernment = "Accuracy\nDiscernment",
  awareness = "Awareness")

# Grab only the lower‐triangle 
tri <- which(upper.tri(cor_mat, diag = TRUE), arr.ind = TRUE)
cor_df <- tibble(
  Var1 = rownames(cor_mat)[tri[,1]],
  Var2 = colnames(cor_mat)[tri[,2]],
  r    = cor_mat[tri]
) %>%
  # force the plotting order
  mutate(
    Var1 = factor(Var1, levels = names(name_map)),
    Var2 = factor(Var2, levels = names(name_map))
  )

# Plot
ggplot(cor_df, aes(x = Var2, y = Var1)) +
  geom_tile(aes(fill = r), color = "white") +
  geom_text(
    aes(
      label  = sprintf("%.2f", r),
      colour = ifelse(r > 0.9, "white", "black")
    ),
    hjust    = 0.5,
    size     = 3,
    fontface = "bold",
    show.legend = FALSE   
  ) +
  scale_colour_identity(guide = "none") +
  scale_x_discrete(
    expand   = c(0, 0),
    labels   = name_map,
    limits   = rev(names(name_map)),
    position = "top"
  ) +
  scale_y_discrete(
    expand = c(0, 0),
    labels = name_map,
    position = "right"
  ) +
  scale_fill_gradient2(
    low      = "red3",
    mid      = "white",
    high     = "black",
    midpoint = 0,
    limits   = c(-1, 1),
    name     = "r") +
  coord_fixed() +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid       = element_blank(),
    axis.title       = element_blank(),
    axis.text.y      = element_text(hjust = 1, vjust = 0.5, size = 11),
    axis.text.x.top  = element_text(angle = 90, hjust = 0, vjust = 0.5, size = 11),
    legend.position="none"
  )

ggsave("output/figures/correlation_matrix.pdf", height = 5, width = 5)

## Descriptive plots ----
### Function to make factor ----
# fct_case_when function makes a factor with levels in the same order as case when
fct_case_when <- function(...) {
  args <- as.list(match.call())
  levels <- sapply(args[-1], function(f) f[[3]])  # extract RHS of formula
  levels <- levels[!is.na(levels)]
  factor(dplyr::case_when(...), levels=levels)
}

### Set colors for bar plots ----
barplot_colors <- c("#2066a8", "#ea801c")

### Make plots ----
#### Accuracy discernment ----
statement_labels <- c(
  "False1" = "Cow urine cures \nCOVID-19",
  "False2" = "Papaya leaves \ncure dengue",
  "False3" = "Mobile towers \ncause cancer",
  "False4" = "Exorcism treats \nsnake bites",
  "True1" = "Seat belts reduce \ncrash injuries",
  "True2" = "COVID-19 vaccine \nis effective",
  "True3" = "Handwashing \nprevents infections",
  "True4" = "Smoking causes \nlung cancer"
)

bimli_svy %>%
  select(treatment, discernment1:discernment10) %>%
  mutate(across(-treatment, \(x) case_when(
    abs(x) %in% c(1, 2) ~ 1,
    abs(x) %in% c(3, 4) ~ 0,
    TRUE ~ NA
  ))) %>%
  rename_with(~set_names(c(str_c("False", 1:4), str_c("True", 1:4))), -treatment) %>%
  group_by(treatment) %>%
  summarize(across(everything(), \(x) survey_mean(x, na.rm = TRUE))) %>%
  pivot_longer(-treatment) %>%
  mutate(statement = str_replace(name, "_se", ""),
         name = if_else(str_detect(name, "_"), "se", "prop")) %>%
  pivot_wider(id_cols = c(treatment, statement)) %>%
  mutate(margin = qnorm(0.975) * se,
         truth = if_else(str_detect(statement, "True"), 
                         "True Statements", "False Statements")) %>%
  ggplot(aes(x = statement, y = prop, fill = treatment)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = prop - margin, ymax = prop + margin), 
                position = position_dodge(0.9), width = 0.1) +
  facet_grid(cols = vars(truth), scales = "free_x", switch = "y") +
  ggtitle("") +
  xlab("") +
  scale_x_discrete(labels = statement_labels) +  # Use statement_lab as x-axis labels
  scale_y_continuous("Proportion rating statement as accurate", 
                     labels = scales::percent, limits = c(0, 1)) +
  scale_fill_discrete("", type = barplot_colors, 
                      labels = c("Control", "Treatment")) +
  theme_bw() +
  theme(
    plot.title.position = "plot",
    plot.title = element_text(size = 12, hjust = 0),
    strip.text = element_text(size = 10),
    strip.placement = "top",
    legend.position = "bottom",
    legend.margin = margin(t = -10),
    legend.spacing.x = unit(0.5, 'cm'),
    axis.text.x = element_text(angle = 50, hjust = 1, vjust = 1, size = 9)
  )
ggsave("output/figures/accuracy_discernment.pdf",  height = 5, width = 6)

#### Sharing discernment ----
bimli_svy %>%
  select(treatment, sharing1:sharing10) %>%
  mutate(across(-treatment, \(x) case_when(
    abs(x) == 1 ~ 1,
    abs(x) %in% c(2, 3) ~ 0,
    TRUE ~ NA
  ))) %>%
  rename_with(~set_names(c(str_c("False", 1:4), 
                           str_c("True", 1:4))), -treatment) %>%
  group_by(treatment) %>%
  summarize(across(everything(), \(x) survey_mean(x, na.rm = TRUE))) %>%
  pivot_longer(-treatment) %>%
  mutate(statement = str_replace(name, "_se", ""),
         name = if_else(str_detect(name, "_"), "se", "prop")) %>%
  pivot_wider(id_cols = c(treatment, statement)) %>%
  mutate(margin = qnorm(0.975) * se,
         truth = if_else(str_detect(statement, "True"), 
                         "True Statements", "False Statements")) %>%
  ggplot(aes(x = statement, y = prop, fill = treatment)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = prop - margin, ymax = prop + margin), 
                position = position_dodge(0.9), width = 0.1) +
  facet_grid(cols = vars(truth), scales = "free_x", switch = "y") +
  ggtitle("") +
  xlab("") +
  scale_x_discrete(labels = statement_labels) +  # Use statement_lab as x-axis labels
  scale_y_continuous("Proportion rating statement as accurate", 
                     labels = scales::percent, limits = c(0, 1)) +
  scale_fill_discrete("", type = barplot_colors, 
                      labels = c("Control", "Treatment")) +
  theme_bw() +
  theme(
    plot.title.position = "plot",
    plot.title = element_text(size = 12, hjust = 0),
    strip.text = element_text(size = 10),
    strip.placement = "top",
    legend.position = "bottom",
    legend.margin = margin(t = -10),
    legend.spacing.x = unit(0.5, 'cm'),
    axis.text.x = element_text(angle = 50, hjust = 1, vjust = 1, size = 9))
ggsave("output/figures/sharing_discernment.pdf", height = 5, width = 8)

## Half violin plots ----
### Function ----
GeomSplitViolin <- ggproto(
  "GeomSplitViolin", 
  GeomViolin, 
  draw_group = function(self, data, ..., draw_quantiles = NULL) {
    data <- transform(data, 
                      xminv = x - violinwidth * (x - xmin), 
                      xmaxv = x + violinwidth * (xmax - x))
    grp <- data[1,'group']
    newdata <- plyr::arrange(
      transform(data, x = if(grp%%2==1) xminv else xmaxv), 
      if(grp%%2==1) y else -y
    )
    newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
    newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
    if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
      stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
      quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
      aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
      aesthetics$alpha <- rep(1, nrow(quantiles))
      both <- cbind(quantiles, aesthetics)
      quantile_grob <- GeomPath$draw_panel(both, ...)
      ggplot2:::ggname("geom_split_violin", 
                       grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
    } else {
      ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
    }
  }
)

geom_split_violin_custom <- function (mapping = NULL, 
                                      data = NULL, 
                                      stat = "ydensity", 
                                      position = "identity", ..., 
                                      draw_quantiles = NULL, 
                                      trim = TRUE, 
                                      scale = "area", 
                                      na.rm = FALSE, 
                                      show.legend = NA, 
                                      inherit.aes = TRUE) {
  layer(data = data, 
        mapping = mapping, 
        stat = stat, 
        geom = GeomSplitViolin, 
        position = position, 
        show.legend = show.legend, 
        inherit.aes = inherit.aes, 
        params = list(trim = trim, 
                      scale = scale, 
                      draw_quantiles = draw_quantiles, 
                      na.rm = na.rm, ...)
  )
}

get_legend_custom <- function (plot, pattern = "guide-box-bottom", return_all = FALSE) 
{
  plot <- as_gtable(plot)
  grob_names <- plot_component_names(plot)
  grobs <- plot_components(plot)
  grobIndex <- which(grepl(pattern, grob_names))
  if (length(grobIndex) != 0) {
    if (length(grobIndex) > 1 && !return_all) {
      warning("Multiple components found; returning the first one. To return all, use `return_all = TRUE`.")
      grobIndex <- grobIndex[1]
      matched_grobs <- grobs[[grobIndex]]
    }
    else if (length(grobIndex) > 1 && return_all) {
      matched_grobs <- grobs[grobIndex]
    }
    else {
      matched_grobs <- grobs[[grobIndex]]
    }
  }
  else {
    matched_grobs <- NULL
  }
  invisible(matched_grobs)
}

# Pull the variable names
outcome_vars <- dv_labs_endline_main$dv

# Reshape bimli from wide to long using the outcome variable names
bimli_long_main_dvs <- bimli %>%
  select(treatment_num, all_of(outcome_vars)) %>%
  pivot_longer(cols = all_of(outcome_vars), names_to = "dv", values_to = "value") %>%
  left_join(dv_labs_endline_main, by = "dv") %>%
  mutate(treatment_label = factor(treatment_num, labels = c("Control", "Treatment"))) %>%
  filter(is.finite(value))

summary_stats <- bimli_long_main_dvs %>%
  group_by(idx, treatment_label) %>%
  summarise(
    mean = mean(value, na.rm = TRUE),
    se = sd(value, na.rm = TRUE) / sqrt(n()),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(
    ci = 1.96 * se,
    ymin = mean - ci,
    ymax = mean + ci
  )


### Plot ----
main_violin_plot <- ggplot(bimli_long_main_dvs, aes(x = "0", y = value, fill = treatment_label)) +
  geom_split_violin_custom(trim = FALSE, alpha = 0.5) +
  geom_boxplot(width = 0.2, alpha = 0.7, outlier.shape = NA) +
  scale_fill_manual(values = c("Treatment" = "#ea801c", "Control" = "#2066a8")) +
  labs(x = NULL, y = "Index Score") +
  facet_wrap(~ idx, scales = "free_y") +
  theme_bw() %+replace%
  theme(
    axis.text.x = element_blank(),
    axis.title.y = element_text(size = 12, angle = 90),
    panel.grid.major.x = element_blank(),
    legend.position = "none",
    strip.text = element_text(size = 12, margin = margin(t = 3, b = 3)),
    strip.background = element_rect(linewidth = 0.5, fill = "grey90")
  )

helper_violin_plot <- ggplot(bimli_long_main_dvs, aes(x = treatment_label, y = value, fill = treatment_label)) +
  geom_boxplot() +
  scale_fill_manual(values = c("Treatment" = "#ea801c", 
                               "Control" = "#2066a8")) +
  theme(
    legend.position = "bottom",
    legend.key = element_rect(fill = NA, colour = NA),
    legend.key.size = unit(1, "cm"),      # size of legend keys (squares)
    legend.text = element_text(size = 10),  # size of legend labels
    legend.title = element_text(size = 12)  # size of legend title (if used)
  ) +
  labs(fill = "")

# Extract legend only
legend_violin_plot <- get_legend_custom(helper_violin_plot)

final_violin_plot <- ggdraw() +
  draw_plot(main_violin_plot, 0, 0, 1, 1) + # fill whole canvas
  draw_plot(legend_violin_plot,x = 0.5, y = 0.07, width = 0.33, height = 0.2)
final_violin_plot

ggsave(plot = final_violin_plot, 
       filename = "output/figures/violin_plot.pdf", 
       height = 5, width = 8)

# ----
# Follow-up mechanisms ----
## Reaction to misinfo ----
follow_q7_tab <- bimli %>%
  group_by(treatment) %>%
  summarize(
    `Emphasize that it is false` = mean(follow_misinfo_react_sd_explain, na.rm=T),
    `Teach strategy to verify` = mean(follow_misinfo_react_sd_teach_strategy, na.rm=T),
    `Admonish for spreading false info` = mean(follow_misinfo_react_sd_admonish, na.rm=T),
    `Emphasize not sharing` = mean(follow_misinfo_react_sd_dont_share, na.rm=T))

follow_q7_tab <- follow_q7_tab %>%
  pivot_longer(cols = c(`Emphasize that it is false`, 
                        `Teach strategy to verify`, 
                        `Admonish for spreading false info`, 
                        `Emphasize not sharing`), 
               names_to = "Reaction", 
               values_to = "Proportion") %>%
  pivot_wider(names_from = treatment, values_from = Proportion) %>%
  mutate(
    `Spoken English` = round(`Spoken English`, 2),
    `Media Literacy` = round(`Media Literacy`, 2)
  )

follow_q7_tab$statistic <- c(
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_explain,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_explain)$statistic,
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_teach_strategy,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_teach_strategy)$statistic,
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_admonish,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_admonish)$statistic,
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_dont_share,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_dont_share)$statistic)

follow_q7_tab$`p-value` <- c(
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_explain,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_explain)$p.value,
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_teach_strategy,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_teach_strategy)$p.value,
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_admonish,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_admonish)$p.value,
  t.test(filter(bimli, treatment=="Media Literacy")$follow_misinfo_react_sd_dont_share,
         filter(bimli, treatment=="Spoken English")$follow_misinfo_react_sd_dont_share)$p.value)

follow_q7_tab <- 
  follow_q7_tab %>%
  as.data.frame() %>% 
  mutate(
    `p-value` = case_when(
      `p-value` < 0.001 ~ "< 0.001",
      TRUE ~ as.character(round(`p-value`, 2))),
    statistic = round(statistic, 2))

follow_q7_tab_tex <- xtable(follow_q7_tab,
                            caption = "Reactions to misinformation",
                            label = "tab:followup-reactions")
                            
print(follow_q7_tab_tex, 
      caption.placement = "top",
      include.rownames=FALSE,
      file = "output/tables/tab_follow_reactions.tex", 
      type = "latex")

## Parents' reason for sending kids to program ----
# Create frequency table
follow_q1_guardian_tab <- table(bimli$follow_guardian_reason_send_child) %>%
  as.data.frame()

# Rename columns
names(follow_q1_guardian_tab) <- c("Reason", "Freq")

# Add percentages
follow_q1_guardian_tab <- follow_q1_guardian_tab %>%
  mutate(Percent = 100 * Freq / sum(Freq)) %>%
  arrange(desc(Percent))  

# Select and reorder columns for LaTeX
follow_q1_guardian_tab <- follow_q1_guardian_tab %>%
  select(Reason, Percent)

# Round percentages
follow_q1_guardian_tab$Percent <- round(follow_q1_guardian_tab$Percent, 2)

# Create xtable
follow_q1_guardian_tab_tex <- xtable(
  follow_q1_guardian_tab,
  caption = "Parents' reasons for sending child to classes",
  label = "tab:followup-parents",
  align = c("r", "l", "r") 
)

# Print to file
print(
  follow_q1_guardian_tab_tex,
  file = "output/tables/tab_follow_parents_reason.tex",
  type = "latex",
  include.rownames = FALSE,
  sanitize.text.function = identity
)

# END of 08_descriptives.R ----